package com.yaoandw.crawler;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.StringReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;

public class SaveFile {
	public static String base_path="d:/work/crawler/";
	/**
	 * 根据 url 和网页类型生成需要保存的网页的文件名 去除掉 url 中非文件名字符
	 */
	public static String getFileNameByUrl(String url, String contentType) {
		// remove http://
		url = url.substring(7);
		// text/html类型
		if (contentType.indexOf("html") != -1) {
			url = url.replaceAll("[\\?/:*|<>\"]", "_") + ".html";
			return url;
		}
		// 如application/pdf类型
		else {
			return url.replaceAll("[\\?/:*|<>\"]", "_");// + "."+ contentType.substring(contentType.lastIndexOf("/") + 1);
		}
	}
	public static void saveFile(String url,String contentType,String text){
		if (contentType != null &&( !contentType.startsWith ("text")||"text/plain".equals(contentType))){
			long t1 = System.currentTimeMillis();
//			DownLoadFile.downloadFile(url, contentType);
//			System.out.println("1save url:"+url+" cost:"+(System.currentTimeMillis()-t1));
			long t2 = System.currentTimeMillis();
			saveMediaFile(url, contentType);
			System.out.println("2save url:"+url+" cost:"+(System.currentTimeMillis()-t2));
		}else{
			saveTextFile(url, contentType, text);
		}
		
	}
	private static void saveTextFile(String url,String contentType,String text){
		BufferedReader bf = null;
		PrintWriter pw = null;
		try{
			bf = new BufferedReader(new StringReader(text));
			String path = base_path+getFileNameByUrl(url, contentType);
			File file = new File(path);
			if(!file.exists())
				file.getParentFile().mkdirs();
			pw = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(path), "UTF-8")));
			String st;
			while ((st = bf.readLine()) != null) {
				pw.println(st);
			}
		}catch(Exception e){
			e.printStackTrace();
		}finally{
		try {
			bf.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
		pw.close();
		}
		
	}
	public static boolean saveMediaFile(String surl,String contentType) {
		String saveFilePathName = base_path+getFileNameByUrl(surl, contentType);
		saveFilePathName = saveFilePathName.replace("\\", "/");
		try{
			//处理带有中文的url
			surl = UrlTool.processUrl(surl);
			URL url = new URL(surl.toString());
			URLConnection conn = url.openConnection();
			System.out.println(conn.getHeaderFields());
			InputStream in = conn.getInputStream();
			String saveFilepath = "";
			if (saveFilePathName.contains("/")) {
				saveFilepath = saveFilePathName.substring(0, saveFilePathName
						.lastIndexOf("/"));
				File temp = new File(saveFilepath);
				if (!temp.exists()) {
					temp.mkdirs();
				}
			}
	
			FileOutputStream appOut = new FileOutputStream(saveFilePathName);
			// I/O读写 流读写
			write(in, appOut, true);
		}catch(Exception e){
			e.printStackTrace();
		}
		return true;
	}

	public static void write(InputStream in, OutputStream out, boolean isclose)
			throws Exception {
		boolean flag = false;
		Exception ex = null;
		try {
			int b = -1;
			while ((b = in.read()) != -1) {
				out.write((char) b);
			}
		} catch (Exception e) {
			e.printStackTrace();
			flag = true;
			ex = e;
		} finally {
			if (isclose) {
				if (in != null)
					in.close();
				if (out != null)
					out.close();
			}
		}
		if (flag) {
			throw new Exception("Write error", ex);
		}
	}
	public static void main(String[] args) throws IOException{
//		String surl = "http://7kwx.com/modules/article/txtarticle.php?id=12";//200 查看Content-Disposition属性
//		String surl = "http://www.7kwx.com/modules/article/txtarticle.php?id=12";//301转向了，看location属性
//		String surl = "http://192.168.1.66/h3c110/security/role.do?action=expExcel";
//		String surl = "http://a.tgbus.com/download/33747/1";//302转向了，看location属性
		String surl = "http://www.txdzs.com/n_down.php?i=55913&t=1";//
		URL url = new URL(surl.toString());
		HttpURLConnection conn = (HttpURLConnection)url.openConnection();
		conn.setInstanceFollowRedirects(false);//设置不自动转向
//		conn.addRequestProperty("referer", "http://a.tgbus.com");
		System.out.println(conn.getResponseCode());
		System.out.println(new String(conn.getHeaderFields().toString().getBytes("ISO-8859-1"), "GB2312"));
	}
}
